In [1211]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import scipy as sp
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_predict
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima.model import ARIMAResults
from scipy.signal import detrend
In [1212]:
# Load Data

data_path = '/content/BaggageComplaints.csv'

bag_dat = pd.read_csv(data_path)

bag_dat.head
bag_dat.info()
print(bag_dat)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Airline    252 non-null    object
 1   Date       252 non-null    object
 2   Month      252 non-null    int64 
 3   Year       252 non-null    int64 
 4   Baggage    252 non-null    int64 
 5   Scheduled  252 non-null    int64 
 6   Cancelled  252 non-null    int64 
 7   Enplaned   252 non-null    int64 
dtypes: int64(6), object(2)
memory usage: 15.9+ KB
            Airline     Date  Month  Year  Baggage  Scheduled  Cancelled  \
0    American Eagle  01/2004      1  2004    12502      38276       2481   
1    American Eagle  02/2004      2  2004     8977      35762        886   
2    American Eagle  03/2004      3  2004    10289      39445       1346   
3    American Eagle  04/2004      4  2004     8095      38982        755   
4    American Eagle  05/2004      5  2004    10618      40422       2206   
..              ...      ...    ...   ...      ...        ...        ...   
247          United  08/2010      8  2010    14099      30637        344   
248          United  09/2010      9  2010     9435      28072        161   
249          United  10/2010     10  2010     9565      29144        140   
250          United  11/2010     11  2010     8597      27318        104   
251          United  12/2010     12  2010    14415      27619        599   

     Enplaned  
0      992360  
1     1060618  
2     1227469  
3     1234451  
4     1267581  
..        ...  
247   4263211  
248   3679517  
249   3952549  
250   3573268  
251   3493643  

[252 rows x 8 columns]
In [1213]:
bag_dat_sub = bag_dat.drop(columns=['Airline', 'Date', 'Month', 'Year'])


# Compute correlations
cormat = bag_dat_sub.corr()

# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)

# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()

American Eagle Baggage Claims¶

In [1214]:
bag_dat_American_Eagle = bag_dat[bag_dat['Airline'] == 'American Eagle']
bag_dat_American_Eagle.head(5)
Out[1214]:
Airline Date Month Year Baggage Scheduled Cancelled Enplaned
0 American Eagle 01/2004 1 2004 12502 38276 2481 992360
1 American Eagle 02/2004 2 2004 8977 35762 886 1060618
2 American Eagle 03/2004 3 2004 10289 39445 1346 1227469
3 American Eagle 04/2004 4 2004 8095 38982 755 1234451
4 American Eagle 05/2004 5 2004 10618 40422 2206 1267581
In [1215]:
bag_dat_sub_American_Eagle = bag_dat_American_Eagle.drop(columns=['Airline', 'Date', 'Month', 'Year'])


# Compute correlations
cormat = bag_dat_sub_American_Eagle.corr()

# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)

# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()
In [1216]:
# Convert date column to a date class variable

# Create a monthly index for the series
bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
bag_ts_American_Eagle = pd.Series(bag_dat_American_Eagle['Baggage'].values, index=bag_dat_American_Eagle['Date'])

bag_ts_American_Eagle.index.freq = 'MS'


#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_American_Eagle.plot()
plt.title("Baggage Claims for American Eagle Airlines")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.show()
<ipython-input-1216-c058082e6f6e>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
In [1217]:
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_American_Eagle = bag_dat_American_Eagle[['Baggage']]

sm.stats.acorr_ljungbox(Bags_American_Eagle, lags=[10], return_df=True)
Out[1217]:
lb_stat lb_pvalue
10 363.970723 4.308185e-72
In [1218]:
#Plot the ACF and PACF for the coffee series

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(bag_ts_American_Eagle, ax=ax2_American_Eagle, zero=False)
plt.show()

For seasonal time series, the first step in the process is taking a seasonal difference. To do this, you need to include the seasonal_order(P,D,Q,s) option when you run the ARIMA procedure. P is the number of seasonal AR terms, Q is the number of seasonal MA terms, s is the length of the seasonal period (for example, s=12 if it takes 12 months to complete the seasonal cycle). To take a seasonal difference set D=1.

In [1219]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1220]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1221]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,0), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1222]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1223]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1224]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1225]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1226]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 2, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1227]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()

Notice that the ACF and PACF for the residuals look like a typical autoregressive signature, so the next step might be to add p = 1 to include an AR(1) term in the model. This will not be sufficient to get the residuals to white noise. Recall that an MA term can help to smooth out a model after differencing. In this case we took a seasonal difference, so we'll include a seasonal MA term by setting Q=1 in the seasonal order.

In [1228]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(1, 1, 1, 12)   Log Likelihood                -665.168
Date:                          Mon, 23 Oct 2023   AIC                           1340.336
Time:                                  02:44:22   BIC                           1351.719
Sample:                              01-01-2004   HQIC                          1344.868
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9713      0.027     36.332      0.000       0.919       1.024
ma.L1         -0.3991      0.094     -4.227      0.000      -0.584      -0.214
ar.S.L12       0.1188      0.126      0.940      0.347      -0.129       0.367
ma.S.L12      -0.9886      0.159     -6.217      0.000      -1.300      -0.677
sigma2      4.778e+06   3.42e-08    1.4e+14      0.000    4.78e+06    4.78e+06
===================================================================================
Ljung-Box (L1) (Q):                   3.28   Jarque-Bera (JB):                 1.83
Prob(Q):                              0.07   Prob(JB):                         0.40
Heteroskedasticity (H):               0.22   Skew:                             0.16
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.71
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.29e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1fa07f0>
In [1229]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1229]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 9044.082011 2725.486659 15362.677364
2004-03-01 8121.529686 2039.178630 14203.880742
2004-04-01 8331.565687 2276.700644 14386.430729
2004-05-01 7268.907708 1214.894887 13322.920529
2004-06-01 8014.247678 1961.363000 14067.132357
In [1230]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1230]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 9044.082011 2725.486659 15362.677364 8977
2004-03-01 8121.529686 2039.178630 14203.880742 10289
2004-04-01 8331.565687 2276.700644 14386.430729 8095
2004-05-01 7268.907708 1214.894887 13322.920529 10618
2004-06-01 8014.247678 1961.363000 14067.132357 13684
In [1231]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 13.988%
RMSE = 2,840.712
In [1232]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 1)x(0, 1, 1, 12)   Log Likelihood                -666.623
Date:                          Mon, 23 Oct 2023   AIC                           1343.246
Time:                                  02:44:25   BIC                           1354.629
Sample:                              01-01-2004   HQIC                          1347.778
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0264      0.109     -0.242      0.808      -0.240       0.187
ar.L2          0.9148      0.087     10.523      0.000       0.744       1.085
ma.L1          0.8751      0.114      7.651      0.000       0.651       1.099
ma.S.L12      -0.9898      0.159     -6.220      0.000      -1.302      -0.678
sigma2      4.643e+06   3.51e-08   1.32e+14      0.000    4.64e+06    4.64e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.13   Jarque-Bera (JB):                 3.35
Prob(Q):                              0.72   Prob(JB):                         0.19
Heteroskedasticity (H):               0.22   Skew:                            -0.16
Prob(H) (two-sided):                  0.00   Kurtosis:                         4.01
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.05e+29. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b660b0>
In [1233]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(1, 0, 2)x(0, 1, [1], 12)   Log Likelihood                -664.733
Date:                            Mon, 23 Oct 2023   AIC                           1339.466
Time:                                    02:44:30   BIC                           1350.849
Sample:                                01-01-2004   HQIC                          1343.998
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9832      0.021     46.902      0.000       0.942       1.024
ma.L1         -0.3366      0.094     -3.571      0.000      -0.521      -0.152
ma.L2         -0.1252      0.117     -1.074      0.283      -0.354       0.103
ma.S.L12      -0.9926      0.154     -6.450      0.000      -1.294      -0.691
sigma2      4.568e+06   3.42e-08   1.34e+14      0.000    4.57e+06    4.57e+06
===================================================================================
Ljung-Box (L1) (Q):                   1.21   Jarque-Bera (JB):                 2.30
Prob(Q):                              0.27   Prob(JB):                         0.32
Heteroskedasticity (H):               0.24   Skew:                             0.28
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.68
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.9e+28. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b66800>
In [1234]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 2)x(0, 1, [1], 12)   Log Likelihood                -665.093
Date:                            Mon, 23 Oct 2023   AIC                           1342.187
Time:                                    02:44:34   BIC                           1355.847
Sample:                                01-01-2004   HQIC                          1347.625
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          1.1811      1.039      1.136      0.256      -0.856       3.218
ar.L2         -0.1841      1.032     -0.178      0.858      -2.208       1.839
ma.L1         -0.5189      1.020     -0.509      0.611      -2.519       1.481
ma.L2         -0.0614      0.456     -0.135      0.893      -0.955       0.832
ma.S.L12      -0.9775      0.172     -5.690      0.000      -1.314      -0.641
sigma2      4.767e+06   7.25e-08   6.57e+13      0.000    4.77e+06    4.77e+06
===================================================================================
Ljung-Box (L1) (Q):                   1.08   Jarque-Bera (JB):                 1.77
Prob(Q):                              0.30   Prob(JB):                         0.41
Heteroskedasticity (H):               0.25   Skew:                             0.20
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.65
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.63e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1b49600>
In [1235]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                       SARIMAX Results                                       
=============================================================================================
Dep. Variable:                                     y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(0, 1, [1, 2], 12)   Log Likelihood                -665.213
Date:                               Mon, 23 Oct 2023   AIC                           1340.425
Time:                                       02:44:38   BIC                           1351.809
Sample:                                   01-01-2004   HQIC                          1344.957
                                        - 12-01-2010                                         
Covariance Type:                                 opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9737      0.025     38.686      0.000       0.924       1.023
ma.L1         -0.4019      0.094     -4.257      0.000      -0.587      -0.217
ma.S.L12      -0.9000      0.167     -5.386      0.000      -1.228      -0.572
ma.S.L24      -0.0970      0.116     -0.836      0.403      -0.324       0.130
sigma2      4.722e+06   3.48e-08   1.36e+14      0.000    4.72e+06    4.72e+06
===================================================================================
Ljung-Box (L1) (Q):                   3.17   Jarque-Bera (JB):                 1.72
Prob(Q):                              0.08   Prob(JB):                         0.42
Heteroskedasticity (H):               0.23   Skew:                             0.17
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.68
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.55e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a02abaf0>
In [1236]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(1, 0, 0)x(0, 1, [1], 12)   Log Likelihood                -669.273
Date:                            Mon, 23 Oct 2023   AIC                           1344.546
Time:                                    02:44:39   BIC                           1351.376
Sample:                                01-01-2004   HQIC                          1347.265
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9322      0.025     37.466      0.000       0.883       0.981
ma.S.L12      -0.8991      0.167     -5.397      0.000      -1.226      -0.573
sigma2       5.34e+06   3.34e-08    1.6e+14      0.000    5.34e+06    5.34e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.40   Jarque-Bera (JB):                 5.16
Prob(Q):                              0.53   Prob(JB):                         0.08
Heteroskedasticity (H):               0.24   Skew:                            -0.21
Prob(H) (two-sided):                  0.00   Kurtosis:                         4.24
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.03e+29. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3c61750>
In [1237]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(0, 1, 1, 12)   Log Likelihood                -665.483
Date:                          Mon, 23 Oct 2023   AIC                           1338.966
Time:                                  02:44:40   BIC                           1348.073
Sample:                              01-01-2004   HQIC                          1342.591
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9790      0.022     45.408      0.000       0.937       1.021
ma.L1         -0.4075      0.096     -4.246      0.000      -0.596      -0.219
ma.S.L12      -0.9971      0.152     -6.539      0.000      -1.296      -0.698
sigma2      4.693e+06   3.27e-08   1.44e+14      0.000    4.69e+06    4.69e+06
===================================================================================
Ljung-Box (L1) (Q):                   2.60   Jarque-Bera (JB):                 1.88
Prob(Q):                              0.11   Prob(JB):                         0.39
Heteroskedasticity (H):               0.24   Skew:                             0.27
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.59
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 7.54e+28. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1addd20>
In [1238]:
# Plot ACF and PACF for residuals

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()

After reviewing the ACF and PACF for the resiuals from this model and noticing that the Ljung Box test (below) indicates that the residuals are white noise, this seems to be a reasonable model for the data.

In [1239]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1239]:
lb_stat lb_pvalue
10 14.185104 0.164715

Convert predictions into a data frame in order to compute the forecast error

In [1240]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1240]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 8877.523581 2308.238573 15446.808589
2004-03-01 8010.148141 1679.242038 14341.054243
2004-04-01 8220.446741 1916.472828 14524.420654
2004-05-01 7162.927350 859.526021 13466.328679
2004-06-01 7885.027953 1583.340640 14186.715265

Join actual values to predicted values

In [1241]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1241]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 8877.523581 2308.238573 15446.808589 8977
2004-03-01 8010.148141 1679.242038 14341.054243 10289
2004-04-01 8220.446741 1916.472828 14524.420654 8095
2004-05-01 7162.927350 859.526021 13466.328679 10618
2004-06-01 7885.027953 1583.340640 14186.715265 13684

Calculate the Forecast Error

In [1242]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 14.014%
RMSE = 2,880.493

Generate the forecast for the next six months

In [1243]:
# Generate forecast for the next six months
forecast_American_Eagle = fit_AR1_American_Eagle.get_forecast(steps=6)
mean_forecast_American_Eagle = forecast_American_Eagle.predicted_mean
confidence_intervals_American_Eagle = forecast_American_Eagle.conf_int()

# Create a DataFrame with forecasted values and confidence intervals
forecast_df_American_Eagle = pd.DataFrame({
    'Forecasted_CPI': mean_forecast_American_Eagle,
    'Lower_Bound': confidence_intervals_American_Eagle.iloc[:, 0],
    'Upper_Bound': confidence_intervals_American_Eagle.iloc[:, 1]
})

forecast_df_American_Eagle
Out[1243]:
Forecasted_CPI Lower_Bound Upper_Bound
2011-01-01 10558.880550 6047.453083 15070.308016
2011-02-01 7716.470919 2537.663582 12895.278256
2011-03-01 9475.082087 3734.462212 15215.701962
2011-04-01 7834.568862 1607.491752 14061.645972
2011-05-01 8397.925840 1742.128457 15053.723223
2011-06-01 11098.022980 4059.863838 18136.182121

Plot the series and append the six month forecast. Notice that the forecast follows a pattern similar to the one observed in the actual data

In [1244]:
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_American_Eagle.plot(label='Observed', legend=True)
mean_forecast_American_Eagle.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_American_Eagle.index,
                 confidence_intervals_American_Eagle.iloc[:, 0],
                 confidence_intervals_American_Eagle.iloc[:, 1], color='pink', alpha=0.3)
plt.title("American Eagle Baggage Claims Forecast")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

United Airlines Baggage Claims¶

In [1245]:
bag_dat_United = bag_dat[bag_dat['Airline'] == 'United']
bag_dat_United.head(5)
Out[1245]:
Airline Date Month Year Baggage Scheduled Cancelled Enplaned
168 United 01/2004 1 2004 25015 45809 1017 4434315
169 United 02/2004 2 2004 16660 42675 312 4458657
170 United 03/2004 3 2004 19318 46512 321 5302929
171 United 04/2004 4 2004 15638 45309 162 5288871
172 United 05/2004 5 2004 19302 46630 652 5408451
In [1246]:
bag_dat_sub_United = bag_dat_United.drop(columns=['Airline', 'Date', 'Month', 'Year'])


# Compute correlations
cormat = bag_dat_sub_United.corr()

# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)

# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()
In [1247]:
# Convert date column to a date class variable

# Create a monthly index for the series
bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
bag_ts_United = pd.Series(bag_dat_United['Baggage'].values, index=bag_dat_United['Date'])

bag_ts_United.index.freq = 'MS'


#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_United.plot()
plt.title("Baggage Claims for United")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.show()
<ipython-input-1247-cb68bd3cd6fe>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
In [1248]:
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_United = bag_dat_United[['Baggage']]

sm.stats.acorr_ljungbox(Bags_United, lags=[10], return_df=True)
Out[1248]:
lb_stat lb_pvalue
10 160.317171 2.786746e-29
In [1249]:
#Plot the ACF and PACF for the coffee series

fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_United, ax=ax1_United, zero=False)
plot_pacf(bag_ts_United, ax=ax2_United, zero=False)
plt.show()
In [1250]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_United = ARIMA(bag_ts_United, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_United = fit_ARD12_United.resid

fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_United, ax=ax1_United, zero=False)
plot_pacf(residuals12_United, ax=ax2_United, zero=False, lags=9)
plt.show()
In [1251]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(1, 0, 0)x(0, 1, [1], 12)   Log Likelihood                -706.585
Date:                            Mon, 23 Oct 2023   AIC                           1419.170
Time:                                    02:44:45   BIC                           1426.000
Sample:                                01-01-2004   HQIC                          1421.890
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8258      0.035     23.635      0.000       0.757       0.894
ma.S.L12      -0.6496      0.116     -5.580      0.000      -0.878      -0.421
sigma2      1.813e+07   1.72e-09   1.05e+16      0.000    1.81e+07    1.81e+07
===================================================================================
Ljung-Box (L1) (Q):                   0.48   Jarque-Bera (JB):                17.36
Prob(Q):                              0.49   Prob(JB):                         0.00
Heteroskedasticity (H):               0.30   Skew:                            -0.47
Prob(H) (two-sided):                  0.00   Kurtosis:                         5.21
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.51e+31. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a172e3e0>
In [1252]:
# Plot ACF and PACF for residuals

fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_United, ax=ax1_United, zero=False)
plot_pacf(residuals_United, ax=ax2_United, zero=False, lags=9)
plt.show()
In [1253]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
Out[1253]:
lb_stat lb_pvalue
10 16.951913 0.075435
In [1254]:
# convert to data frame
predicted_mean_United = pred_units_United.predicted_mean
conf_int_United = pred_units_United.conf_int()

prediction_df_United = pd.DataFrame({
    'predicted_mean': predicted_mean_United,
    'lower_bound': conf_int_United.iloc[:, 0],
    'upper_bound': conf_int_United.iloc[:, 1]
})
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
Out[1254]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 19898.744014 9639.203017 30158.285011
2004-03-01 13278.758482 3019.242911 23538.274052
2004-04-01 15061.243589 4805.267095 25317.220083
2004-05-01 12035.958696 1785.483779 22286.433612
2004-06-01 14649.272687 4406.927829 24891.617545
In [1255]:
# add back actuals --
prediction_df_United = pd.concat([prediction_df_United,bag_ts_United.to_frame(name='Baggage')],axis=1)
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
Out[1255]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 19898.744014 9639.203017 30158.285011 16660
2004-03-01 13278.758482 3019.242911 23538.274052 19318
2004-04-01 15061.243589 4805.267095 25317.220083 15638
2004-05-01 12035.958696 1785.483779 22286.433612 19302
2004-06-01 14649.272687 4406.927829 24891.617545 21892
In [1256]:
# calc error
errors_United = prediction_df_United['Baggage'] - prediction_df_United['predicted_mean']
# absolue value of error
errors_abs_United = errors_United.abs()
# -- mape --
mape_United = (errors_abs_United/prediction_df_United['Baggage'].abs()).mean()
# -- rmse
rmse_United = (errors_United**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_United:.3%}')
print(f'RMSE = {rmse_United:,.3f}')
MAPE = 17.942%
RMSE = 5,462.825
In [1257]:
# Generate forecast for the next six months
forecast_United = fit_AR1_United.get_forecast(steps=6)
mean_forecast_United = forecast_United.predicted_mean
confidence_intervals_United = forecast_United.conf_int()

# Create a DataFrame with forecasted values and confidence intervals
forecast_df_United = pd.DataFrame({
    'Forecasted_CPI': mean_forecast_United,
    'Lower_Bound': confidence_intervals_United.iloc[:, 0],
    'Upper_Bound': confidence_intervals_United.iloc[:, 1]
})

forecast_df_United
Out[1257]:
Forecasted_CPI Lower_Bound Upper_Bound
2011-01-01 11517.989987 3166.755574 19869.224401
2011-02-01 8070.936387 -2757.702057 18899.574831
2011-03-01 11672.924016 -560.511295 23906.359327
2011-04-01 9262.037189 -3843.209135 22367.283512
2011-05-01 11463.779642 -2204.058916 25131.618201
2011-06-01 17903.990424 3865.490787 31942.490060
In [1258]:
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_United.plot(label='Observed', legend=True)
mean_forecast_United.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_United.index,
                 confidence_intervals_United.iloc[:, 0],
                 confidence_intervals_United.iloc[:, 1], color='pink', alpha=0.3)
plt.title("United Airlines Baggage Claims Forecast")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

American Eagle Cancelled Flights¶

In [1259]:
# Convert date column to a date class variable

# Create a monthly index for the series
bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
bag_ts_American_Eagle = pd.Series(bag_dat_American_Eagle['Cancelled'].values, index=bag_dat_American_Eagle['Date'])

bag_ts_American_Eagle.index.freq = 'MS'


#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_American_Eagle.plot()
plt.title("Cancelled Flights for American Eagle Airlines")
plt.xlabel("Date")
plt.ylabel("# Cancelled FLights")
plt.show()
<ipython-input-1259-8b5711e5a063>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
In [1260]:
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_American_Eagle = bag_dat_American_Eagle[['Cancelled']]

sm.stats.acorr_ljungbox(Bags_American_Eagle, lags=[10], return_df=True)
Out[1260]:
lb_stat lb_pvalue
10 33.222572 0.00025
In [1261]:
#Plot the ACF and PACF for the coffee series

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(bag_ts_American_Eagle, ax=ax2_American_Eagle, zero=False)
plt.show()
In [1262]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1263]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1264]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1265]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1266]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1267]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1268]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1269]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1270]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1271]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1272]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1273]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1274]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
In [1275]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 2, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1276]:
#  which is yt - y(t-12)

fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(2, 1, 2, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_American_Eagle = fit_ARD12_American_Eagle.resid

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
In [1277]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 1, 1)x(0, 1, 1, 12)   Log Likelihood                -555.651
Date:                          Mon, 23 Oct 2023   AIC                           1119.303
Time:                                  02:45:14   BIC                           1128.353
Sample:                              01-01-2004   HQIC                          1122.902
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0710      0.184     -0.386      0.699      -0.431       0.289
ma.L1         -0.7046      0.108     -6.506      0.000      -0.917      -0.492
ma.S.L12      -0.6649      0.136     -4.889      0.000      -0.931      -0.398
sigma2      3.413e+05   5.82e+04      5.863      0.000    2.27e+05    4.55e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                 6.53
Prob(Q):                              0.92   Prob(JB):                         0.04
Heteroskedasticity (H):               0.99   Skew:                             0.42
Prob(H) (two-sided):                  0.97   Kurtosis:                         4.22
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a04fa530>
In [1278]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(1, 1, 1, 12)   Log Likelihood                -561.787
Date:                          Mon, 23 Oct 2023   AIC                           1133.573
Time:                                  02:45:17   BIC                           1144.957
Sample:                              01-01-2004   HQIC                          1138.105
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9363      0.068     13.825      0.000       0.804       1.069
ma.L1         -0.7194      0.117     -6.155      0.000      -0.948      -0.490
ar.S.L12       0.0507      0.222      0.229      0.819      -0.384       0.486
ma.S.L12      -0.6921      0.197     -3.509      0.000      -1.079      -0.306
sigma2      3.277e+05   5.33e+04      6.145      0.000    2.23e+05    4.32e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.19   Jarque-Bera (JB):                 7.37
Prob(Q):                              0.66   Prob(JB):                         0.03
Heteroskedasticity (H):               0.93   Skew:                             0.40
Prob(H) (two-sided):                  0.87   Kurtosis:                         4.34
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a186dd20>
In [1279]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 1)x(0, 1, 1, 12)   Log Likelihood                -561.591
Date:                          Mon, 23 Oct 2023   AIC                           1133.183
Time:                                  02:45:19   BIC                           1144.566
Sample:                              01-01-2004   HQIC                          1137.715
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8124      0.215      3.777      0.000       0.391       1.234
ar.L2          0.1112      0.176      0.630      0.529      -0.235       0.457
ma.L1         -0.6553      0.152     -4.316      0.000      -0.953      -0.358
ma.S.L12      -0.6794      0.127     -5.358      0.000      -0.928      -0.431
sigma2      3.246e+05   5.21e+04      6.225      0.000    2.22e+05    4.27e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 8.07
Prob(Q):                              0.95   Prob(JB):                         0.02
Heteroskedasticity (H):               0.95   Skew:                             0.41
Prob(H) (two-sided):                  0.90   Kurtosis:                         4.42
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3c62770>
In [1280]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 2)x(0, 1, [1], 12)   Log Likelihood                -559.601
Date:                            Mon, 23 Oct 2023   AIC                           1131.202
Time:                                    02:45:21   BIC                           1144.862
Sample:                                01-01-2004   HQIC                          1136.640
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0395      0.051     -0.779      0.436      -0.139       0.060
ar.L2          0.9600      0.050     19.163      0.000       0.862       1.058
ma.L1          0.2030      0.136      1.489      0.137      -0.064       0.470
ma.L2         -0.6620      0.110     -6.021      0.000      -0.877      -0.446
ma.S.L12      -0.9766      0.159     -6.160      0.000      -1.287      -0.666
sigma2      2.577e+05    6.4e-07   4.03e+11      0.000    2.58e+05    2.58e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 3.67
Prob(Q):                              0.95   Prob(JB):                         0.16
Heteroskedasticity (H):               0.86   Skew:                             0.25
Prob(H) (two-sided):                  0.72   Kurtosis:                         3.98
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number  2e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1df8b20>
In [1281]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(0, 1, 1, 12)   Log Likelihood                -561.808
Date:                          Mon, 23 Oct 2023   AIC                           1131.615
Time:                                  02:45:22   BIC                           1140.722
Sample:                              01-01-2004   HQIC                          1135.241
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9370      0.061     15.312      0.000       0.817       1.057
ma.L1         -0.7181      0.116     -6.166      0.000      -0.946      -0.490
ma.S.L12      -0.6421      0.123     -5.237      0.000      -0.882      -0.402
sigma2      3.306e+05    5.2e+04      6.352      0.000    2.29e+05    4.33e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.19   Jarque-Bera (JB):                 7.47
Prob(Q):                              0.66   Prob(JB):                         0.02
Heteroskedasticity (H):               0.93   Skew:                             0.39
Prob(H) (two-sided):                  0.87   Kurtosis:                         4.38
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a19b13c0>
In [1282]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1282]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 246.706788 -2164.378269 2657.791844
2004-03-01 276.001192 -2127.639065 2679.641448
2004-04-01 333.640809 -2065.472371 2732.753989
2004-05-01 324.030000 -2072.441677 2720.501676
2004-06-01 431.173159 -1963.865843 2826.212161
In [1283]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1283]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 246.706788 -2164.378269 2657.791844 886
2004-03-01 276.001192 -2127.639065 2679.641448 1346
2004-04-01 333.640809 -2065.472371 2732.753989 755
2004-05-01 324.030000 -2072.441677 2720.501676 2206
2004-06-01 431.173159 -1963.865843 2826.212161 1580
In [1284]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 43.820%
RMSE = 672.910
In [1285]:
# Plot ACF and PACF for residuals

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1286]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1286]:
lb_stat lb_pvalue
10 35.045521 0.000123
In [1287]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                     SARIMAX Results                                     
=========================================================================================
Dep. Variable:                                 y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(1, 1, [], 12)   Log Likelihood                -564.261
Date:                           Mon, 23 Oct 2023   AIC                           1136.521
Time:                                   02:45:23   BIC                           1145.628
Sample:                               01-01-2004   HQIC                          1140.147
                                    - 12-01-2010                                         
Covariance Type:                             opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8843      0.110      8.036      0.000       0.669       1.100
ma.L1         -0.6247      0.177     -3.533      0.000      -0.971      -0.278
ar.S.L12      -0.4359      0.153     -2.846      0.004      -0.736      -0.136
sigma2      3.664e+05   4.84e+04      7.566      0.000    2.71e+05    4.61e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 8.14
Prob(Q):                              0.95   Prob(JB):                         0.02
Heteroskedasticity (H):               1.31   Skew:                            -0.07
Prob(H) (two-sided):                  0.51   Kurtosis:                         4.64
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a17b4910>
In [1288]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1288]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 307.726972 -2107.907098 2723.361042
2004-03-01 326.234049 -2079.324186 2731.792283
2004-04-01 382.482066 -2017.769151 2782.733284
2004-05-01 356.566959 -2041.033200 2754.167118
2004-06-01 480.027285 -1916.384609 2876.439179
In [1289]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1289]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 307.726972 -2107.907098 2723.361042 886
2004-03-01 326.234049 -2079.324186 2731.792283 1346
2004-04-01 382.482066 -2017.769151 2782.733284 755
2004-05-01 356.566959 -2041.033200 2754.167118 2206
2004-06-01 480.027285 -1916.384609 2876.439179 1580
In [1290]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 45.101%
RMSE = 683.157
In [1291]:
# Plot ACF and PACF for residuals

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1292]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1292]:
lb_stat lb_pvalue
10 31.207754 0.000542
In [1293]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, [1], 12)   Log Likelihood                -563.592
Date:                            Mon, 23 Oct 2023   AIC                           1135.183
Time:                                    02:45:25   BIC                           1144.290
Sample:                                01-01-2004   HQIC                          1138.809
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2581      0.125      2.056      0.040       0.012       0.504
ar.L2          0.3334      0.116      2.863      0.004       0.105       0.562
ma.S.L12      -0.6334      0.117     -5.428      0.000      -0.862      -0.405
sigma2      3.445e+05   4.51e+04      7.645      0.000    2.56e+05    4.33e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):                12.76
Prob(Q):                              0.83   Prob(JB):                         0.00
Heteroskedasticity (H):               1.20   Skew:                             0.57
Prob(H) (two-sided):                  0.66   Kurtosis:                         4.72
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1968880>
In [1294]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1294]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 363.258763 -2112.590077 2839.107603
2004-03-01 471.404179 -1978.164028 2920.972386
2004-04-01 399.504162 -2047.351014 2846.359339
2004-05-01 388.398615 -2057.173215 2833.970445
2004-06-01 462.663200 -1982.834174 2908.160574
In [1295]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1295]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 363.258763 -2112.590077 2839.107603 886
2004-03-01 471.404179 -1978.164028 2920.972386 1346
2004-04-01 399.504162 -2047.351014 2846.359339 755
2004-05-01 388.398615 -2057.173215 2833.970445 2206
2004-06-01 462.663200 -1982.834174 2908.160574 1580
In [1296]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 45.927%
RMSE = 680.648
In [1297]:
# Plot ACF and PACF for residuals

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1298]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1298]:
lb_stat lb_pvalue
10 36.958414 0.000058
In [1299]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                     SARIMAX Results                                     
=========================================================================================
Dep. Variable:                                 y   No. Observations:                   84
Model:             ARIMA(1, 0, 1)x(0, 1, [], 12)   Log Likelihood                -571.372
Date:                           Mon, 23 Oct 2023   AIC                           1148.744
Time:                                   02:45:26   BIC                           1155.574
Sample:                               01-01-2004   HQIC                          1151.463
                                    - 12-01-2010                                         
Covariance Type:                             opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7900      0.168      4.708      0.000       0.461       1.119
ma.L1         -0.5219      0.247     -2.111      0.035      -1.007      -0.037
sigma2      4.568e+05   6.18e+04      7.388      0.000    3.36e+05    5.78e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 3.31
Prob(Q):                              1.00   Prob(JB):                         0.19
Heteroskedasticity (H):               1.42   Skew:                            -0.07
Prob(H) (two-sided):                  0.39   Kurtosis:                         4.04
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1219570>
In [1300]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1300]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 307.528647 -2109.155084 2724.212377
2004-03-01 308.630126 -2099.544345 2716.804596
2004-04-01 356.645595 -2047.597941 2760.889130
2004-05-01 324.181183 -2078.230115 2726.592481
2004-06-01 454.625386 -1946.928338 2856.179110
In [1301]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1301]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 307.528647 -2109.155084 2724.212377 886
2004-03-01 308.630126 -2099.544345 2716.804596 1346
2004-04-01 356.645595 -2047.597941 2760.889130 755
2004-05-01 324.181183 -2078.230115 2726.592481 2206
2004-06-01 454.625386 -1946.928338 2856.179110 1580
In [1302]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 49.141%
RMSE = 737.088
In [1303]:
# Plot ACF and PACF for residuals

fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
In [1304]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1304]:
lb_stat lb_pvalue
10 22.779727 0.01159
In [1305]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
                                       SARIMAX Results                                       
=============================================================================================
Dep. Variable:                                     y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, [1, 2], 12)   Log Likelihood                -563.257
Date:                               Mon, 23 Oct 2023   AIC                           1136.515
Time:                                       02:45:29   BIC                           1147.898
Sample:                                   01-01-2004   HQIC                          1141.047
                                        - 12-01-2010                                         
Covariance Type:                                 opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2246      0.124      1.811      0.070      -0.018       0.468
ar.L2          0.3388      0.120      2.822      0.005       0.103       0.574
ma.S.L12      -0.6673      0.205     -3.256      0.001      -1.069      -0.266
ma.S.L24      -0.1305      0.167     -0.782      0.434      -0.458       0.197
sigma2      3.223e+05   6.19e+04      5.204      0.000    2.01e+05    4.44e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.02   Jarque-Bera (JB):                12.55
Prob(Q):                              0.88   Prob(JB):                         0.00
Heteroskedasticity (H):               1.18   Skew:                             0.63
Prob(H) (two-sided):                  0.68   Kurtosis:                         4.61
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d61d80>
In [1306]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 2)x(0, 1, 2, 12)   Log Likelihood                -559.518
Date:                          Mon, 23 Oct 2023   AIC                           1133.035
Time:                                  02:45:33   BIC                           1148.972
Sample:                              01-01-2004   HQIC                          1139.380
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0446      0.063     -0.707      0.480      -0.168       0.079
ar.L2          0.9552      0.062     15.419      0.000       0.834       1.077
ma.L1          0.2078      0.143      1.455      0.146      -0.072       0.488
ma.L2         -0.6578      0.115     -5.723      0.000      -0.883      -0.433
ma.S.L12      -0.9370      0.192     -4.871      0.000      -1.314      -0.560
ma.S.L24      -0.0518      0.151     -0.342      0.732      -0.349       0.245
sigma2      2.581e+05   7.33e-07   3.52e+11      0.000    2.58e+05    2.58e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                 3.74
Prob(Q):                              0.93   Prob(JB):                         0.15
Heteroskedasticity (H):               0.88   Skew:                             0.25
Prob(H) (two-sided):                  0.75   Kurtosis:                         4.00
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.55e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a164a800>
In [1307]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(2, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                       SARIMAX Results                                       
=============================================================================================
Dep. Variable:                                     y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(2, 1, [1, 2], 12)   Log Likelihood                -563.121
Date:                               Mon, 23 Oct 2023   AIC                           1140.243
Time:                                       02:45:36   BIC                           1156.179
Sample:                                   01-01-2004   HQIC                          1146.587
                                        - 12-01-2010                                         
Covariance Type:                                 opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2206      0.127      1.740      0.082      -0.028       0.469
ar.L2          0.3291      0.120      2.732      0.006       0.093       0.565
ar.S.L12       1.2299      2.256      0.545      0.586      -3.191       5.651
ar.S.L24      -0.2305      0.993     -0.232      0.816      -2.176       1.715
ma.S.L12      -1.9795      2.125     -0.932      0.351      -6.143       2.185
ma.S.L24       0.9796      2.492      0.393      0.694      -3.904       5.864
sigma2      2.905e+05   2.26e-05   1.29e+10      0.000    2.91e+05    2.91e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.02   Jarque-Bera (JB):                11.82
Prob(Q):                              0.90   Prob(JB):                         0.00
Heteroskedasticity (H):               1.20   Skew:                             0.62
Prob(H) (two-sided):                  0.65   Kurtosis:                         4.56
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 7.85e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a186ddb0>
In [1308]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, 0, 12)   Log Likelihood                -571.852
Date:                          Mon, 23 Oct 2023   AIC                           1149.705
Time:                                  02:45:37   BIC                           1156.535
Sample:                              01-01-2004   HQIC                          1152.424
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3001      0.125      2.393      0.017       0.054       0.546
ar.L2          0.1788      0.116      1.544      0.123      -0.048       0.406
sigma2      4.746e+05   6.88e+04      6.895      0.000     3.4e+05     6.1e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.03   Jarque-Bera (JB):                 2.41
Prob(Q):                              0.86   Prob(JB):                         0.30
Heteroskedasticity (H):               1.55   Skew:                             0.02
Prob(H) (two-sided):                  0.29   Kurtosis:                         3.90
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a12e63b0>
In [1309]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                          ARIMA   Log Likelihood                -671.044
Date:                Mon, 23 Oct 2023   AIC                           1346.087
Time:                        02:45:37   BIC                           1350.949
Sample:                    01-01-2004   HQIC                          1348.042
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1414.7620     89.626     15.785      0.000    1239.099    1590.425
sigma2      5.088e+05    8.3e+04      6.133      0.000    3.46e+05    6.71e+05
===================================================================================
Ljung-Box (L1) (Q):                  10.40   Jarque-Bera (JB):                 8.67
Prob(Q):                              0.00   Prob(JB):                         0.01
Heteroskedasticity (H):               2.08   Skew:                             0.76
Prob(H) (two-sided):                  0.06   Kurtosis:                         3.38
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3dc4970>
In [1310]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1310]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 1414.761951 16.710898 2812.813005
2004-03-01 1414.761951 16.710898 2812.813005
2004-04-01 1414.761951 16.710898 2812.813005
2004-05-01 1414.761951 16.710898 2812.813005
2004-06-01 1414.761951 16.710898 2812.813005
In [1311]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1311]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 1414.761951 16.710898 2812.813005 886
2004-03-01 1414.761951 16.710898 2812.813005 1346
2004-04-01 1414.761951 16.710898 2812.813005 755
2004-05-01 1414.761951 16.710898 2812.813005 2206
2004-06-01 1414.761951 16.710898 2812.813005 1580
In [1312]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 60.603%
RMSE = 707.812
In [1313]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1313]:
lb_stat lb_pvalue
10 33.222572 0.00025
In [1314]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                SARIMAX Results                                 
================================================================================
Dep. Variable:                        y   No. Observations:                   84
Model:             ARIMA(0, 1, [1], 12)   Log Likelihood                -572.764
Date:                  Mon, 23 Oct 2023   AIC                           1149.529
Time:                          02:45:38   BIC                           1154.082
Sample:                      01-01-2004   HQIC                          1151.341
                           - 12-01-2010                                         
Covariance Type:                    opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.S.L12      -0.5074      0.089     -5.690      0.000      -0.682      -0.333
sigma2       4.51e+05   7.15e+04      6.307      0.000    3.11e+05    5.91e+05
===================================================================================
Ljung-Box (L1) (Q):                  11.31   Jarque-Bera (JB):                 1.20
Prob(Q):                              0.00   Prob(JB):                         0.55
Heteroskedasticity (H):               1.61   Skew:                             0.16
Prob(H) (two-sided):                  0.25   Kurtosis:                         3.55
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d6a8f0>
In [1315]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1315]:
lb_stat lb_pvalue
10 156.734109 1.528942e-28
In [1316]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(1, 0, 0)   Log Likelihood                -665.563
Date:                Mon, 23 Oct 2023   AIC                           1337.126
Time:                        02:45:38   BIC                           1344.419
Sample:                    01-01-2004   HQIC                          1340.058
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1414.7619    133.929     10.564      0.000    1152.266    1677.258
ar.L1          0.3539      0.102      3.481      0.001       0.155       0.553
sigma2       4.39e+05   6.56e+04      6.694      0.000     3.1e+05    5.67e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.15   Jarque-Bera (JB):                24.14
Prob(Q):                              0.70   Prob(JB):                         0.00
Heteroskedasticity (H):               1.49   Skew:                             1.08
Prob(H) (two-sided):                  0.29   Kurtosis:                         4.50
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1e5e140>
In [1317]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1317]:
lb_stat lb_pvalue
10 17.732891 0.05964
In [1318]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1318]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 1792.100807 493.539834 3090.661780
2004-03-01 1227.634474 -70.926499 2526.195447
2004-04-01 1390.427272 91.866299 2688.988246
2004-05-01 1181.273917 -117.287057 2479.834890
2004-06-01 1694.779025 396.218052 2993.339999
In [1319]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1319]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 1792.100807 493.539834 3090.661780 886
2004-03-01 1227.634474 -70.926499 2526.195447 1346
2004-04-01 1390.427272 91.866299 2688.988246 755
2004-05-01 1181.273917 -117.287057 2479.834890 2206
2004-06-01 1694.779025 396.218052 2993.339999 1580
In [1320]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 51.240%
RMSE = 662.554
In [1321]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(2, 0, 0)   Log Likelihood                -665.131
Date:                Mon, 23 Oct 2023   AIC                           1338.262
Time:                        02:45:39   BIC                           1347.985
Sample:                    01-01-2004   HQIC                          1342.170
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1416.6280    148.803      9.520      0.000    1124.979    1708.277
ar.L1          0.3129      0.136      2.296      0.022       0.046       0.580
ar.L2          0.1047      0.136      0.770      0.441      -0.162       0.371
sigma2      4.414e+05   6.66e+04      6.629      0.000    3.11e+05    5.72e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                26.32
Prob(Q):                              0.93   Prob(JB):                         0.00
Heteroskedasticity (H):               1.53   Skew:                             1.09
Prob(H) (two-sided):                  0.27   Kurtosis:                         4.66
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1c7f910>
In [1322]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(1, 0, 1)   Log Likelihood                -665.305
Date:                Mon, 23 Oct 2023   AIC                           1338.610
Time:                        02:45:39   BIC                           1348.334
Sample:                    01-01-2004   HQIC                          1342.519
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1414.7625    144.605      9.784      0.000    1131.342    1698.183
ar.L1          0.5163      0.321      1.610      0.107      -0.112       1.145
ma.L1         -0.1870      0.419     -0.446      0.656      -1.009       0.635
sigma2      4.384e+05   6.57e+04      6.675      0.000     3.1e+05    5.67e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                25.95
Prob(Q):                              0.91   Prob(JB):                         0.00
Heteroskedasticity (H):               1.50   Skew:                             1.10
Prob(H) (two-sided):                  0.29   Kurtosis:                         4.62
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1e5d270>
In [1323]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(1, 0, 0)x(0, 1, 0, 12)   Log Likelihood                -573.055
Date:                          Mon, 23 Oct 2023   AIC                           1150.111
Time:                                  02:45:40   BIC                           1154.664
Sample:                              01-01-2004   HQIC                          1151.923
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3692      0.100      3.686      0.000       0.173       0.565
sigma2      4.832e+05    6.6e+04      7.326      0.000    3.54e+05    6.13e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.48   Jarque-Bera (JB):                 3.46
Prob(Q):                              0.49   Prob(JB):                         0.18
Heteroskedasticity (H):               1.35   Skew:                            -0.17
Prob(H) (two-sided):                  0.46   Kurtosis:                         4.02
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1dc4bb0>
In [1324]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1324]:
lb_stat lb_pvalue
10 44.687572 0.000002
In [1325]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(1, 1, 0)   Log Likelihood                -666.295
Date:                Mon, 23 Oct 2023   AIC                           1336.589
Time:                        02:45:40   BIC                           1341.427
Sample:                    01-01-2004   HQIC                          1338.533
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.4132      0.126     -3.275      0.001      -0.660      -0.166
sigma2      5.398e+05   5.47e+04      9.874      0.000    4.33e+05    6.47e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                42.43
Prob(Q):                              0.92   Prob(JB):                         0.00
Heteroskedasticity (H):               1.56   Skew:                             1.09
Prob(H) (two-sided):                  0.24   Kurtosis:                         5.74
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a5ebdf00>
In [1326]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1326]:
lb_stat lb_pvalue
10 30.259919 0.000777
In [1327]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,1), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(0, 0, 1)   Log Likelihood                -667.052
Date:                Mon, 23 Oct 2023   AIC                           1340.104
Time:                        02:45:41   BIC                           1347.396
Sample:                    01-01-2004   HQIC                          1343.035
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1414.7619    111.943     12.638      0.000    1195.358    1634.165
ma.L1          0.2620      0.122      2.153      0.031       0.024       0.501
sigma2      4.566e+05   7.03e+04      6.498      0.000    3.19e+05    5.94e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.31   Jarque-Bera (JB):                17.33
Prob(Q):                              0.58   Prob(JB):                         0.00
Heteroskedasticity (H):               1.61   Skew:                             0.98
Prob(H) (two-sided):                  0.21   Kurtosis:                         4.04
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3be7190>
In [1328]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1328]:
lb_stat lb_pvalue
10 20.834496 0.022278
In [1329]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,1,1), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(0, 1, 1)   Log Likelihood                -659.069
Date:                Mon, 23 Oct 2023   AIC                           1322.138
Time:                        02:45:41   BIC                           1326.976
Sample:                    01-01-2004   HQIC                          1324.082
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1         -0.8355      0.055    -15.128      0.000      -0.944      -0.727
sigma2      4.616e+05   7.26e+04      6.358      0.000    3.19e+05    6.04e+05
===================================================================================
Ljung-Box (L1) (Q):                   2.27   Jarque-Bera (JB):                11.66
Prob(Q):                              0.13   Prob(JB):                         0.00
Heteroskedasticity (H):               1.70   Skew:                             0.83
Prob(H) (two-sided):                  0.16   Kurtosis:                         3.77
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b64e20>
In [1330]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1330]:
lb_stat lb_pvalue
10 19.83774 0.030825
In [1331]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(1, 1, 1)   Log Likelihood                -657.734
Date:                Mon, 23 Oct 2023   AIC                           1321.467
Time:                        02:45:42   BIC                           1328.724
Sample:                    01-01-2004   HQIC                          1324.383
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2141      0.143      1.498      0.134      -0.066       0.494
ma.L1         -0.8965      0.064    -14.005      0.000      -1.022      -0.771
sigma2      4.459e+05   6.74e+04      6.621      0.000    3.14e+05    5.78e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                19.18
Prob(Q):                              0.95   Prob(JB):                         0.00
Heteroskedasticity (H):               1.60   Skew:                             1.00
Prob(H) (two-sided):                  0.22   Kurtosis:                         4.25
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a19b3700>
In [1332]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,2), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())

# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(0, 0, 2)   Log Likelihood                -664.233
Date:                Mon, 23 Oct 2023   AIC                           1336.466
Time:                        02:45:42   BIC                           1346.189
Sample:                    01-01-2004   HQIC                          1340.374
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       1414.7620    141.485      9.999      0.000    1137.457    1692.067
ma.L1          0.3213      0.144      2.232      0.026       0.039       0.603
ma.L2          0.2756      0.116      2.380      0.017       0.049       0.503
sigma2      4.489e+05   6.67e+04      6.732      0.000    3.18e+05     5.8e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                23.64
Prob(Q):                              0.96   Prob(JB):                         0.00
Heteroskedasticity (H):               1.73   Skew:                             1.02
Prob(H) (two-sided):                  0.15   Kurtosis:                         4.60
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1a45690>
In [1333]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
Out[1333]:
lb_stat lb_pvalue
10 13.370587 0.203683
In [1334]:
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()

prediction_df_American_Eagle = pd.DataFrame({
    'predicted_mean': predicted_mean_American_Eagle,
    'lower_bound': conf_int_American_Eagle.iloc[:, 0],
    'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1334]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 1785.383897 448.340700 3122.427094
2004-03-01 1391.475226 65.500190 2717.450262
2004-04-01 1161.118923 -152.985961 2475.223807
2004-05-01 1272.025490 -42.035474 2586.086454
2004-06-01 1602.731357 289.435354 2916.027361
In [1335]:
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
Out[1335]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 1785.383897 448.340700 3122.427094 886
2004-03-01 1391.475226 65.500190 2717.450262 1346
2004-04-01 1161.118923 -152.985961 2475.223807 755
2004-05-01 1272.025490 -42.035474 2586.086454 2206
2004-06-01 1602.731357 289.435354 2916.027361 1580
In [1336]:
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 50.871%
RMSE = 651.822
In [1337]:
# Generate forecast for the next six months
forecast_American_Eagle = fit_AR1_American_Eagle.get_forecast(steps=6)
mean_forecast_American_Eagle = forecast_American_Eagle.predicted_mean
confidence_intervals_American_Eagle = forecast_American_Eagle.conf_int()

# Create a DataFrame with forecasted values and confidence intervals
forecast_df_American_Eagle = pd.DataFrame({
    'Forecasted_CPI': mean_forecast_American_Eagle,
    'Lower_Bound': confidence_intervals_American_Eagle.iloc[:, 0],
    'Upper_Bound': confidence_intervals_American_Eagle.iloc[:, 1]
})

forecast_df_American_Eagle
Out[1337]:
Forecasted_CPI Lower_Bound Upper_Bound
2011-01-01 1514.979671 201.835725 2828.123617
2011-02-01 1688.808716 309.536083 3068.081349
2011-03-01 1414.761958 -11.198800 2840.722716
2011-04-01 1414.761958 -11.198800 2840.722716
2011-05-01 1414.761958 -11.198800 2840.722716
2011-06-01 1414.761958 -11.198800 2840.722716
In [1338]:
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_American_Eagle.plot(label='Observed', legend=True)
mean_forecast_American_Eagle.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_American_Eagle.index,
                 confidence_intervals_American_Eagle.iloc[:, 0],
                 confidence_intervals_American_Eagle.iloc[:, 1], color='pink', alpha=0.3)
plt.title("American Eagle Cancelled Flights Forecast")
plt.xlabel("Date")
plt.ylabel("# Cancelled Flights")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

United Airlines Cancelled Flights¶

In [1339]:
# Convert date column to a date class variable

# Create a monthly index for the series
bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
bag_ts_United = pd.Series(bag_dat_United['Cancelled'].values, index=bag_dat_United['Date'])

bag_ts_United.index.freq = 'MS'


#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_United.plot()
plt.title("Cancelled Flights for United Airlines")
plt.xlabel("Date")
plt.ylabel("# Cancelled FLights")
plt.show()
<ipython-input-1339-d65356821bcf>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
In [1340]:
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_United = bag_dat_United[['Cancelled']]

sm.stats.acorr_ljungbox(Bags_United, lags=[10], return_df=True)
Out[1340]:
lb_stat lb_pvalue
10 37.135304 0.000054
In [1341]:
#Plot the ACF and PACF for the coffee series

fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_United, ax=ax1_United, zero=False)
plot_pacf(bag_ts_United, ax=ax2_United, zero=False)
plt.show()
In [1342]:
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
#  which is yt - y(t-12)

fit_ARD12_United = ARIMA(bag_ts_United, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()

# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data

residuals12_United = fit_ARD12_United.resid

fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_United, ax=ax1_United, zero=False)
plot_pacf(residuals12_United, ax=ax2_United, zero=False, lags=9)
plt.show()
In [1343]:
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(1, 0, 0)x(0, 1, [1], 12)   Log Likelihood                -535.276
Date:                            Mon, 23 Oct 2023   AIC                           1076.553
Time:                                    02:45:47   BIC                           1083.383
Sample:                                01-01-2004   HQIC                          1079.272
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3382      0.109      3.103      0.002       0.125       0.552
ma.S.L12      -0.5301      0.095     -5.555      0.000      -0.717      -0.343
sigma2      1.589e+05   1.78e+04      8.920      0.000    1.24e+05    1.94e+05
===================================================================================
Ljung-Box (L1) (Q):                   1.29   Jarque-Bera (JB):                34.35
Prob(Q):                              0.26   Prob(JB):                         0.00
Heteroskedasticity (H):               0.78   Skew:                             1.07
Prob(H) (two-sided):                  0.54   Kurtosis:                         5.62
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a161c6d0>
In [1344]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
Out[1344]:
lb_stat lb_pvalue
10 46.873687 9.955560e-07
In [1345]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(2, 0, 0)   Log Likelihood                -620.147
Date:                Mon, 23 Oct 2023   AIC                           1248.295
Time:                        02:45:47   BIC                           1258.018
Sample:                    01-01-2004   HQIC                          1252.203
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        678.6309    109.281      6.210      0.000     464.445     892.817
ar.L1          0.2078      0.115      1.804      0.071      -0.018       0.434
ar.L2          0.2657      0.099      2.675      0.007       0.071       0.460
sigma2       1.51e+05   1.74e+04      8.678      0.000    1.17e+05    1.85e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.15   Jarque-Bera (JB):               122.47
Prob(Q):                              0.70   Prob(JB):                         0.00
Heteroskedasticity (H):               1.38   Skew:                             1.75
Prob(H) (two-sided):                  0.40   Kurtosis:                         7.77
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3dc7880>
In [1346]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, 0, 12)   Log Likelihood                -532.970
Date:                          Mon, 23 Oct 2023   AIC                           1071.939
Time:                                  02:45:47   BIC                           1078.769
Sample:                              01-01-2004   HQIC                          1074.658
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1488      0.113      1.311      0.190      -0.074       0.371
ar.L2          0.3652      0.105      3.484      0.000       0.160       0.571
sigma2      1.608e+05   2.24e+04      7.177      0.000    1.17e+05    2.05e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                11.93
Prob(Q):                              0.97   Prob(JB):                         0.00
Heteroskedasticity (H):               0.85   Skew:                             0.70
Prob(H) (two-sided):                  0.68   Kurtosis:                         4.42
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a210c5e0>
In [1347]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 1)x(0, 1, 1, 12)   Log Likelihood                -526.405
Date:                          Mon, 23 Oct 2023   AIC                           1062.811
Time:                                  02:45:49   BIC                           1074.194
Sample:                              01-01-2004   HQIC                          1067.343
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7365      0.148      4.979      0.000       0.447       1.026
ar.L2          0.2152      0.111      1.937      0.053      -0.003       0.433
ma.L1         -0.6527      0.159     -4.107      0.000      -0.964      -0.341
ma.S.L12      -0.7401      0.168     -4.403      0.000      -1.070      -0.411
sigma2      1.178e+05   1.66e+04      7.079      0.000    8.52e+04     1.5e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                25.25
Prob(Q):                              0.92   Prob(JB):                         0.00
Heteroskedasticity (H):               0.65   Skew:                             1.05
Prob(H) (two-sided):                  0.29   Kurtosis:                         5.01
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a17cc550>
In [1348]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 2)x(0, 1, [1], 12)   Log Likelihood                -525.136
Date:                            Mon, 23 Oct 2023   AIC                           1062.271
Time:                                    02:45:51   BIC                           1075.931
Sample:                                01-01-2004   HQIC                          1067.709
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0805      0.192      0.419      0.675      -0.296       0.457
ar.L2          0.8590      0.195      4.416      0.000       0.478       1.240
ma.L1          0.0375      0.235      0.160      0.873      -0.423       0.498
ma.L2         -0.5380      0.204     -2.632      0.008      -0.939      -0.137
ma.S.L12      -0.7715      0.211     -3.665      0.000      -1.184      -0.359
sigma2      1.124e+05   1.78e+04      6.309      0.000    7.75e+04    1.47e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                26.39
Prob(Q):                              0.97   Prob(JB):                         0.00
Heteroskedasticity (H):               0.67   Skew:                             1.10
Prob(H) (two-sided):                  0.33   Kurtosis:                         5.00
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1790880>
In [1349]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, 0, 12)   Log Likelihood                -532.970
Date:                          Mon, 23 Oct 2023   AIC                           1071.939
Time:                                  02:45:51   BIC                           1078.769
Sample:                              01-01-2004   HQIC                          1074.658
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1488      0.113      1.311      0.190      -0.074       0.371
ar.L2          0.3652      0.105      3.484      0.000       0.160       0.571
sigma2      1.608e+05   2.24e+04      7.177      0.000    1.17e+05    2.05e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                11.93
Prob(Q):                              0.97   Prob(JB):                         0.00
Heteroskedasticity (H):               0.85   Skew:                             0.70
Prob(H) (two-sided):                  0.68   Kurtosis:                         4.42
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a12909a0>
In [1350]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(1, 1, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                    SARIMAX Results                                     
========================================================================================
Dep. Variable:                                y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(1, 1, 0, 12)   Log Likelihood                -530.939
Date:                          Mon, 23 Oct 2023   AIC                           1069.878
Time:                                  02:45:52   BIC                           1078.984
Sample:                              01-01-2004   HQIC                          1073.503
                                   - 12-01-2010                                         
Covariance Type:                            opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1751      0.107      1.635      0.102      -0.035       0.385
ar.L2          0.3933      0.108      3.640      0.000       0.182       0.605
ar.S.L12      -0.2478      0.146     -1.701      0.089      -0.533       0.038
sigma2      1.473e+05   2.15e+04      6.865      0.000    1.05e+05    1.89e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                11.03
Prob(Q):                              0.92   Prob(JB):                         0.00
Heteroskedasticity (H):               0.81   Skew:                             0.69
Prob(H) (two-sided):                  0.60   Kurtosis:                         4.33
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d604f0>
In [1351]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(1, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(1, 1, [1], 12)   Log Likelihood                -526.380
Date:                            Mon, 23 Oct 2023   AIC                           1062.759
Time:                                    02:45:53   BIC                           1074.142
Sample:                                01-01-2004   HQIC                          1067.291
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1778      0.099      1.796      0.072      -0.016       0.372
ar.L2          0.4029      0.091      4.405      0.000       0.224       0.582
ar.S.L12       0.3386      0.134      2.526      0.012       0.076       0.601
ma.S.L12      -0.9990      0.135     -7.376      0.000      -1.264      -0.734
sigma2      1.047e+05    1.3e-06   8.05e+10      0.000    1.05e+05    1.05e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                18.94
Prob(Q):                              0.91   Prob(JB):                         0.00
Heteroskedasticity (H):               0.75   Skew:                             1.02
Prob(H) (two-sided):                  0.48   Kurtosis:                         4.47
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.89e+25. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1dc64d0>
In [1352]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 0, 0, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                   84
Model:                 ARIMA(1, 0, 0)   Log Likelihood                -623.153
Date:                Mon, 23 Oct 2023   AIC                           1252.306
Time:                        02:45:54   BIC                           1259.598
Sample:                    01-01-2004   HQIC                          1255.237
                         - 12-01-2010                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        678.6314     90.161      7.527      0.000     501.920     855.343
ar.L1          0.2873      0.122      2.359      0.018       0.049       0.526
sigma2      1.631e+05   2.06e+04      7.904      0.000    1.23e+05    2.04e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.48   Jarque-Bera (JB):               135.65
Prob(Q):                              0.49   Prob(JB):                         0.00
Heteroskedasticity (H):               1.58   Skew:                             1.90
Prob(H) (two-sided):                  0.23   Kurtosis:                         7.93
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3d36230>
In [1353]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
Out[1353]:
lb_stat lb_pvalue
10 22.87172 0.01123
In [1354]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(1, 0, 2)x(0, 1, [1], 12)   Log Likelihood                -526.740
Date:                            Mon, 23 Oct 2023   AIC                           1063.480
Time:                                    02:45:55   BIC                           1074.863
Sample:                                01-01-2004   HQIC                          1068.012
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.9765      0.071     13.849      0.000       0.838       1.115
ma.L1         -0.8724      0.134     -6.530      0.000      -1.134      -0.611
ma.L2          0.1370      0.099      1.390      0.165      -0.056       0.330
ma.S.L12      -0.9949      7.869     -0.126      0.899     -16.417      14.428
sigma2      9.958e+04   7.72e+05      0.129      0.897   -1.41e+06    1.61e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.02   Jarque-Bera (JB):                31.94
Prob(Q):                              0.87   Prob(JB):                         0.00
Heteroskedasticity (H):               0.66   Skew:                             1.16
Prob(H) (two-sided):                  0.32   Kurtosis:                         5.29
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1790520>
In [1355]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                       SARIMAX Results                                       
=============================================================================================
Dep. Variable:                                     y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, [1, 2], 12)   Log Likelihood                -525.780
Date:                               Mon, 23 Oct 2023   AIC                           1061.559
Time:                                       02:45:58   BIC                           1072.942
Sample:                                   01-01-2004   HQIC                          1066.091
                                        - 12-01-2010                                         
Covariance Type:                                 opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1717      0.106      1.617      0.106      -0.036       0.380
ar.L2          0.3986      0.095      4.178      0.000       0.212       0.586
ma.S.L12      -0.6314      0.182     -3.466      0.001      -0.988      -0.274
ma.S.L24      -0.3676      0.148     -2.492      0.013      -0.657      -0.078
sigma2      9.985e+04   1.66e-06   6.02e+10      0.000    9.99e+04    9.99e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                17.92
Prob(Q):                              0.96   Prob(JB):                         0.00
Heteroskedasticity (H):               0.79   Skew:                             0.99
Prob(H) (two-sided):                  0.56   Kurtosis:                         4.44
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.06e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a0420160>
In [1356]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,1), seasonal_order=(0, 1, 2, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                       SARIMAX Results                                       
=============================================================================================
Dep. Variable:                                     y   No. Observations:                   84
Model:             ARIMA(2, 0, 1)x(0, 1, [1, 2], 12)   Log Likelihood                -524.684
Date:                               Mon, 23 Oct 2023   AIC                           1061.368
Time:                                       02:46:03   BIC                           1075.028
Sample:                                   01-01-2004   HQIC                          1066.806
                                        - 12-01-2010                                         
Covariance Type:                                 opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7123      0.172      4.139      0.000       0.375       1.050
ar.L2          0.2166      0.126      1.717      0.086      -0.031       0.464
ma.L1         -0.6267      0.180     -3.483      0.000      -0.979      -0.274
ma.S.L12      -0.6987      8.378     -0.083      0.934     -17.119      15.722
ma.S.L24      -0.2947      2.546     -0.116      0.908      -5.284       4.695
sigma2      9.798e+04   8.13e+05      0.121      0.904    -1.5e+06    1.69e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.03   Jarque-Bera (JB):                24.39
Prob(Q):                              0.87   Prob(JB):                         0.00
Heteroskedasticity (H):               0.72   Skew:                             1.08
Prob(H) (two-sided):                  0.43   Kurtosis:                         4.86
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3d500a0>
In [1357]:
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data

#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()


# Summary of the ARIMA model
print(fit_AR1_United.summary())

# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()

#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:                                  y   No. Observations:                   84
Model:             ARIMA(2, 0, 0)x(0, 1, [1], 12)   Log Likelihood                -528.251
Date:                            Mon, 23 Oct 2023   AIC                           1064.501
Time:                                    02:46:03   BIC                           1073.608
Sample:                                01-01-2004   HQIC                          1068.127
                                     - 12-01-2010                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2038      0.091      2.246      0.025       0.026       0.382
ar.L2          0.4335      0.086      5.030      0.000       0.265       0.602
ma.S.L12      -0.6021      0.110     -5.452      0.000      -0.819      -0.386
sigma2      1.287e+05   1.78e+04      7.243      0.000    9.39e+04    1.63e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):                14.42
Prob(Q):                              0.82   Prob(JB):                         0.00
Heteroskedasticity (H):               0.74   Skew:                             0.90
Prob(H) (two-sided):                  0.47   Kurtosis:                         4.25
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a164a050>
In [1358]:
#execute the Ljung Box test on the residuals

sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
Out[1358]:
lb_stat lb_pvalue
10 14.645233 0.145541
In [1359]:
# convert to data frame
predicted_mean_United = pred_units_United.predicted_mean
conf_int_United = pred_units_United.conf_int()

prediction_df_United = pd.DataFrame({
    'predicted_mean': predicted_mean_United,
    'lower_bound': conf_int_United.iloc[:, 0],
    'upper_bound': conf_int_United.iloc[:, 1]
})
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
Out[1359]:
predicted_mean lower_bound upper_bound
Date
2004-02-01 67.689413 -2111.295560 2246.674386
2004-03-01 111.617093 -2058.288275 2281.522460
2004-04-01 80.478285 -2088.197694 2249.154264
2004-05-01 80.234820 -2087.262471 2247.732112
2004-06-01 82.668750 -2084.667914 2250.005414
In [1360]:
# add back actuals --
prediction_df_United = pd.concat([prediction_df_United,bag_ts_United.to_frame(name='Baggage')],axis=1)
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
Out[1360]:
predicted_mean lower_bound upper_bound Baggage
Date
2004-02-01 67.689413 -2111.295560 2246.674386 312
2004-03-01 111.617093 -2058.288275 2281.522460 321
2004-04-01 80.478285 -2088.197694 2249.154264 162
2004-05-01 80.234820 -2087.262471 2247.732112 652
2004-06-01 82.668750 -2084.667914 2250.005414 549
In [1361]:
# calc error
errors_United = prediction_df_United['Baggage'] - prediction_df_United['predicted_mean']
# absolue value of error
errors_abs_United = errors_United.abs()
# -- mape --
mape_United = (errors_abs_United/prediction_df_United['Baggage'].abs()).mean()
# -- rmse
rmse_United = (errors_United**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_United:.3%}')
print(f'RMSE = {rmse_United:,.3f}')
MAPE = 48.698%
RMSE = 380.126
In [1362]:
# Generate forecast for the next six months
forecast_United = fit_AR1_United.get_forecast(steps=6)
mean_forecast_United = forecast_United.predicted_mean
confidence_intervals_United = forecast_United.conf_int()

# Create a DataFrame with forecasted values and confidence intervals
forecast_df_United = pd.DataFrame({
    'Forecasted_CPI': mean_forecast_United,
    'Lower_Bound': confidence_intervals_United.iloc[:, 0],
    'Upper_Bound': confidence_intervals_United.iloc[:, 1]
})

forecast_df_United
Out[1362]:
Forecasted_CPI Lower_Bound Upper_Bound
2011-01-01 547.621831 -155.670057 1250.913719
2011-02-01 852.413769 134.680399 1570.147140
2011-03-01 509.236264 -282.371505 1300.844032
2011-04-01 303.110520 -499.121741 1105.342781
2011-05-01 325.908946 -494.398479 1146.216371
2011-06-01 562.942855 -262.429269 1388.314978
In [1363]:
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_United.plot(label='Observed', legend=True)
mean_forecast_United.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_United.index,
                 confidence_intervals_United.iloc[:, 0],
                 confidence_intervals_United.iloc[:, 1], color='pink', alpha=0.3)
plt.title("United Airlines Cancelled Flights Forecast")
plt.xlabel("Date")
plt.ylabel("# Cancelled Flights")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()